#!/usr/bin/python
# -*- coding: utf-8 -*-

"""
Simple caching classes.

See: https://secure.wikimedia.org/wikipedia/en/wiki/Cache_algorithms

Instead of doing::

    cache = {}

    ...
    try:
        return cache[key]
    except KeyError:
        cache[key] = value = load(key)
        return value

or maybe::

    cache = {}
    return cache.get(key, load(key)) # wont cache the default value

now it can done simpler::

    def factory_on_miss(key):
        value = load(key)
        return value

    cache = Cache(factory_on_miss)

    value = cache[key]


Also this module provides a decorator:

    TODO: description of decorator

This caching classes use the default_factory feature introduced
with python 2.5. Therefore this module works with python 2.5 and grater.




"""

__version__ = "0.0.2.dev"
__author__ = "dr0iddr0id {at} gmail [dot] com (C) 2012"


import logging
logger = logging.getLogger('pyknic.cache')

logger.level = logging.DEBUG
if __debug__:
    logger.debug('%s loading ...' % (__name__))
    import time
    _START_TIME = time.time()

#  -----------------------------------------------------------------------------

import collections
from collections import defaultdict as _defaultdict
import weakref

#  -----------------------------------------------------------------------------


# WARNING: this is a naive implementation using a list as internal datastructure
# class LRUCache(_defaultdict):
    # """
    # Least recently used (LRU) cache. Holds a certain number of entries. When
    # it has to cache more entries than it can hold, it removes the least used
    # entry to make room for the new entry. This does not mean, that the removed
    # entry is also deleted necessarily from memory, there could still exist
    # other references to it. But eventually it will be deleted from memory.

    # .. todo:: change capacity to be a function to evaluate:
                                        # is_over_limit(LRUCache) -> True/False
    # .. todo:: is there more efficient implementation? (faster?) yes, linked lists, see:
            # http://www.algorithm.co.il/blogs/programming/python/lru-cache-solution-a-case-for-linked-lists-in-python/

                # IDEA1: use a container with attributes value and access_count, by __getitem__ just increase
                       # the access_count, when __missing__ then use the expensive sort to sort by access_count
                       # -> cached values should be retrieved faster, miss is a bit more expensive
                       # ?: access_count balance between items? only increase it if it is not the most recent
                       # accessed element?
                       # NO: sorting would need to go over all elements, O(n)

                # IDEA2: swap the keys in __recents instead of remove/append <<= not correct!?
                       # -> L[a], L[b] = L[b], L[a]
                       # ?: but need to find index of key in __recents, might defeat it
                       # NO: swapping wont work since it destroys the order

                # IDEA3: just append the recently used key to __recents (it will grow and have duplicates)
                       # -> when __missing__ then remove duplicates maintainig order
                       # NO: removing and filtering is to costy, probably need to go over all elements


    # .. todo:: document hint about cache trashing

    # """

    # def __init__(self, miss_factory, capacity):
        # """
        # :Parameters:
            # miss_factory : function
                # the factory funtions with signature: func(key) -> return value
                # for that key
            # capacity : int
                # number of items to cache

        # """
        # self.__recents = [] # [oldest, ..., recent]
        # self.__capacity = capacity
        # _defaultdict.__init__(self)
        # self.default_factory = miss_factory
        # # optimization
        # self.__recents_append = self.__recents.append
        # self.__recents_remove = self.__recents.remove
        # self.__recents_pop = self.__recents.pop

    # def __getitem__(self, key):
        # # this can cause a keyError as when using a dict
        # value = _defaultdict.__getitem__(self, key)
        # if key == self.__recents[-1]:
            # return value
        # self.__recents_remove(key)
        # self.__recents_append(key)
        # return value

    # def __missing__(self, key):
        # if len(self) == self.__capacity:
            # del_key = self.__recents_pop(0)
            # if __debug__:
                # if logger.level <= logging.DEBUG: # this if for performance, to avoid .debug method call if not needed
                    # logger.debug("removing '%s' from LRU(%s)" %
                                       # (str(del_key), hex(id(self))) )
            # _defaultdict.__delitem__(self, del_key)
        # if __debug__:
            # if logger.level <= logging.DEBUG: # this if for performance, to avoid .debug method call if not needed
                # logger.debug("adding '%s' tp LRU(%s)" %
                                   # (str(key), hex(id(self))) )
        # self.__recents_append(key)
        # value = self.default_factory(key)
        # self[key] = value
        # return value

    # def _get_lru(self):
        # """
        # Returns the lru-list [most recent, ..., oldest]
        # """
        # return list(reversed([(k, self[k]) for k in list(self.__recents)]))
    # lru = property(_get_lru, doc=\
                # """get the lru list [most recent, ...., oldest] of \
 # (key, value tuples), read only""")

    # def __delitem__(self, key):
        # raise NotImplementedError("Not necessary for LRU Cache")

#  -----------------------------------------------------------------------------

_CacheInfo = collections.namedtuple('CacheInfo', 'hits, misses, maxsize, currsize')

class LRUCache(_defaultdict):
    """
    Least recently used (LRU) cache. Holds a certain number of entries. When
    it has to cache more entries than it can hold, it removes the least used
    entry to make room for the new entry. This does not mean, that the removed
    entry is also deleted necessarily from memory, there could still exist
    other references to it. But eventually it will be deleted from memory.

    Usage::

        cache = cache.LRUCache(miss_factory, 4) # this holds 4 elements at max

        value = cache[1]
        value = cache[2]
        value = cache[3]
        value = cache[4]
        value = cache[5] # would remove key 1 since its the oldest

    .. todo:: document hint about cache trashing
    .. todo:: check all inherited methods if they work as expected
    .. todo:: check that miss_factory function/method has only one argument (multiple arguments are not supported)
    .. todo:: document LRUCache.instance and it use

    """
    # Is there more efficient implementation? (faster?) yes, linked lists, see:
            # http://www.algorithm.co.il/blogs/programming/python/lru-cache-solution-a-case-for-linked-lists-in-python/

                # IDEA1: use a container with attributes value and access_count, by __getitem__ just increase
                       # the access_count, when __missing__ then use the expensive sort to sort by access_count
                       # -> cached values should be retrieved faster, miss is a bit more expensive
                       # ?: access_count balance between items? only increase it if it is not the most recent accessed
                       # element?
                       # NO: sorting would need to go over all elements, O(n)

                # IDEA2: swap the keys in __recents instead of remove/append <<= not correct!?
                       # -> L[a], L[b] = L[b], L[a]
                       # ?: but need to find index of key in __recents, might defeat it
                       # NO: swapping wont work since it destroys the order

                # IDEA3: just append the recently used key to __recents (it will grow and have duplicates)
                       # -> when __missing__ then remove duplicates maintainig order
                       # NO: removing and filtering is to costy, probably need to go over all elements

    class _Node(object):
        """Node class for the internal linked list."""
        def __init__(self):
            self.next = None
            self.prev = None
            self.value = None
            self.key = None

        def __repr__(self):
            return str(self.value)

        def __str__(self):
            # attention: self.next and self.prev will use their __str__ too, but since this nodes
            #            form a closed loop this would cause a recursion exceed exception
            return "<{0} at {1} [next:{2}, prev:{3}, key:{4}, value:{5}]>".format(self.__class__.__name__, \
                                        hex(id(self)), hex(id(self.next)), hex(id(self.prev)), self.key, self.value)

    # all LRUCache instances that are created register to this WeakValueDictionary using its id()
    instances = weakref.WeakValueDictionary() # [id(lru_cache_instance):lru_cache_instance]

    def __init__(self, miss_factory, capacity):
        """
        :Parameters:
            miss_factory : function
                the factory funtions with signature: func(key) -> return value
                for that key
            capacity : int or None
                number of items to cache, if set to None it will grow unbounded as using a dict would

        """
        assert id(self) not in LRUCache.instances, 'same LRUCache instance registered twice'
        # register this instance
        LRUCache.instances[id(self)] = self

        # linked list, starting with the sentinel pointing to itself
        self.__sentinel = self._Node()
        self.__sentinel.next = self.__sentinel # most recent key
        self.__sentinel.prev = self.__sentinel # oldest key

        # saving the 'nodes' in a default dict as {key:node}, the value is stored in the node
        if isinstance(capacity, int):
            assert capacity > 0, 'capacity should be > 0'
        self.__capacity = capacity
        _defaultdict.__init__(self)
        self.default_factory = miss_factory

        if __debug__:
            if logger.level <= logging.DEBUG: # this if for performance, to avoid .debug method call if not needed
                logger.debug("%s %s created with miss_factory '%s'" %
                            (hex(id(self)), self.__class__.__name__, str(miss_factory)) )
        self._enabled = True

        # statistics and confort variables
        __wrapped__ = self.default_factory
        self._hits = 0
        self._misses = 0


    def __getitem__(self, *key):
        """
        Returns the value from the dict.
        :Parameters:
            key : object
                the key of the value to return
        :returns: value for that key
        """
        if __debug__:
            if logger.level <= logging.DEBUG: # this if for performance, to avoid .debug method call if not needed
                logger.debug("{0}.__getitem__, {1}, key: {2} enabled: {3}".format(\
                                                        self.__class__.__name__, self, key, self._enabled))
        if not self._enabled:
            return self.default_factory(*key)

        # if a miss occures then 1 is subtracted in the __missing__ method
        self._hits += 1
        if __debug__:
            if logger.level <= logging.DEBUG: # this if for performance, to avoid .debug method call if not needed
                logger.debug("%s %s created with miss_factory '%s' key %s" %
                                (hex(id(self)), self.__class__.__name__, str(self.default_factory), str(key)) )
                logger.debug("key: %s" % (str(key), ) )
        node = _defaultdict.__getitem__(self, key)
        # if requested key is the most recent then
        # nothing has to be done besides of returning the value
        if node == self.__sentinel.next:
            if __debug__:
                if logger.level <= logging.DEBUG: # this if for performance, to avoid .debug method call if not needed
                    # logger.debug("%s hit most recent '%s:%s'" % (hex(id(self)), str(key), str(node.value)) )
                    logger.debug("%s hit most recent '%s:%s'" % (hex(id(self)), str(key), str(node.value)) )
            return node.value
        # otherwise move the corresponding node to the most recent place
        # remove first
        node.prev.next = node.next
        node.next.prev = node.prev
        # insert as sentinel next (most recent)
        node.next = self.__sentinel.next
        node.next.prev = node
        self.__sentinel.next = node
        node.prev = self.__sentinel
        if __debug__:
            if logger.level <= logging.DEBUG: # this if for performance, to avoid .debug method call if not needed
                logger.debug("%s hit (moved to most recent) '%s:%s'" % (hex(id(self)), str(key), str(node.value)) )
        return node.value

    def __missing__(self, key):
        """
            Internal method that is only called when a key is missing in
            the dictionary.
            :Parameters:
                key : object
                    the key that is missing
            :returns: value for the key
        """
        # this is only called if a key is missing in the dict!
        self._misses += 1
        self._hits -= 1 # __getitem__ always counts up regardless of a miss
        if __debug__:
            if logger.level <= logging.DEBUG: # this if for performance, to avoid .debug method call if not needed
                logger.debug("%s: miss for key '%s'" %
                            (hex(id(self)), str(key)) )

        # find out whether to add a new item or recycle an existing one
        if self.__capacity is None or len(self) < self.__capacity:
            # only create a new node if needed
            node = self._Node()
            # set next and prev so __getitem__ thinks the
            # node is in the list (no special treatment)
            node.next = self.__sentinel
            node.prev = self.__sentinel.prev
            if __debug__:
                if logger.level <= logging.DEBUG: # this if for performance, to avoid .debug method call if not needed
                    logger.debug("%s: creating additional node '%s'" %
                                       (hex(id(self)), hex(id(node))) )
        else:
            # get last node in linked list to re-use, prevent costy memory allocation
            node = self.__sentinel.prev
            # no need to remove the node here since it will be
            # removed and inserted in __getitem__
            if __debug__:
                if logger.level <= logging.DEBUG: # this if for performance, to avoid .debug method call if not needed
                    logger.debug("%s: capacity exceeded, removing '%s' (re-using node %s)" %
                                                                        (hex(id(self)), str(key), hex(id(node))) )
            # remove the node from the dict using the 'old' key
            _defaultdict.__delitem__(self, node.key)

        if __debug__:
            if logger.level <= logging.DEBUG: # this if for performance, to avoid .debug method call if not needed
                logger.debug("%s: adding '%s'" %
                                   (hex(id(self)), str(key)) )

        # (re-)use node and assing it the new value
        node.value = self.default_factory(*key)
        if __debug__:
            if logger.level <= logging.DEBUG: # this if for performance, to avoid .debug method call if not needed
                logger.debug("miss factory(%s) called with key: '%s' returned: '%s'" %
                            (hex(id(self)), str(key), node.value) )

        node.key = key
        _defaultdict.__setitem__(self, key, node)
        return node

    def get(self, *key):
        """
        def get(key, default)

        It returns the default value if the requested key is not found.

        :note: neiter the key nor the default value will be added to the cache!
        """
        default = key[-1]
        key = tuple(key[:-1])
        node = _defaultdict.get(self, key, self.__sentinel)
        if node is self.__sentinel: # miss
            if __debug__:
                if logger.level <= logging.DEBUG: # this if for performance, to avoid .debug method call if not needed
                    logger.debug("%s miss, adding default value to key '%s:%s'" %
                                                        (hex(id(self)), str(key), str(default)) )
            self._misses += 1
            return default
        if __debug__:
            if logger.level <= logging.DEBUG: # this if for performance, to avoid .debug method call if not needed
                logger.debug("%s hit '%s:%s'" % (hex(id(self)), str(key), str(default)) )
        self._hits += 1
        return node.value

    def _get_lru(self):
        """
        Returns the ordered list of (key, value) tuples [most recent, ..., oldest]

        :Note: the key might be a tuple too, if used as a decorator
        """
        keys = []
        node = self.__sentinel
        while node.next != self.__sentinel:
            node = node.next
            key = node.key
            keys.append((key if len(key) > 1 else key[0], node.value))
        assert len(keys) == len(self), "keys and internal node count differ"
        return keys
    lru = property(_get_lru, doc=\
                """get the lru list [most recent, ...., oldest] of \
 (key, value) tupels, read only""")

    def __delitem__(self, key):
        raise NotImplementedError("Not necessary for LRU Cache")
    def __setitem__(self, key, value):
        # implement? insert new node or override existing value of keyed node
        raise NotImplementedError("Not necessary for LRU Cache")

    def __contains__(self, *val):
        """Returns True if the key can be found in the cache, otherwise False."""
        return _defaultdict.__contains__(self, val)

    def clear(self):
        """
        Clears the cache and resets the statistics of hits and misses.
        """
        logger.debug("%s clearing LRUCache" %(hex(id(self))))
        # todo: un-hook sentinel
        _defaultdict.clear(self)
        self._hits = 0
        self._misses = 0

    cache_clear = clear

    def cache_info(self):
        """
        To help measure the effectiveness of the cache and tune the maxsize parameter, the wrapped function
        is instrumented with a cache_info() function that returns a named tuple
        showing hits, misses, maxsize and currsize.

        :returns: CacheInfo(hits=3, misses=8, maxsize=20, currsize=8)
        """
        # _CacheInfo(hits=3, misses=8, maxsize=20, currsize=8)
        return _CacheInfo(self._hits, self._misses, self.__capacity, len(self))

    def enable(self, value):
        """
        Enable or disable caching.

        :Parameters:
            value : bool
                if True, then the cache is enabled, otherwise not.

        """
        if __debug__:
            if logger.level <= logging.DEBUG: # this if for performance, to avoid .debug method call if not needed
                logger.debug("enable: {0}".format(value))
        self._enabled = value

    def is_enabled(self):
        """Returns whether the cache is enabled or not."""
        return self._enabled

    @staticmethod
    def enable_global(value):
        """
        Enable or diable all instances of a LRUCache globally.

        :Parameters:
            value : bool
                Enable all instances of the LRUCache class globally, otherwise disables them.

        """
        for cache in LRUCache.instances.itervaluerefs():
            c_ref = cache() # weak ref
            if c_ref is not None:
                c_ref.enable(value)

#  -----------------------------------------------------------------------------


from functools import wraps

def lru_cached(maxsize):
    '''
    Class method decorator specific to the instance.

    It uses a descriptor to delay the definition of the
    method wrapper.

    ..todo: document that only the *args are respected and **kwargs are ignored for caching
    ..todo: inspect posibility to also use the kwargs for key: use kwargs as:
            key = [(instance,)] + args + tuple(sorted(kwds.items()))

    '''
    class Descript(LRUCache):
        def __init__(self, func, maxsize):
            LRUCache.__init__(self, func, maxsize)
            if __debug__:
                logger.debug("{0}.__init__, function: {1}, maxsize: {2}".format(self.__class__.__name__, func, maxsize))
            self.func = func
            self.maxsize = maxsize
            self.cache = self

        def __get__(self, instance, klass):
            if __debug__:
                logger.debug("{0}.__get__, {1}, {2}, {3}".format(self.__class__.__name__, self, instance, klass))
            if instance is None:
                # Class method was requested
                return self.make_unbound(klass)
            # Instance method was requested
            return self.make_bound(instance)

        def __call__(self, *args, **kw):
            # args = (args, tuple([item for item in kw.items()]))
            if __debug__:
                logger.debug("{0}.__call__, {1}, {2}".format(self.__class__.__name__, args, kw))
            # Function was requested
            return self.__getitem__(*args)

        def make_unbound(self, klass):
            """
            Use the function as a unbound method, not Implemented, will raise a TypeError!
            """
            if __debug__:
                logger.debug("{0}.make_unbound, {1}, {2}".format(self.__class__.__name__, self, klass))
            @wraps(self.func)
            def wrapper(*args, **kwargs):
                '''This documentation will vanish :)'''
                if __debug__:
                    logger.debug("{0}.make_unbound: wrapper, {1}, {2}".format(self.__class__.__name__, args, kwargs))
                raise TypeError(
                    'unbound method {0}() must be called with {1} instance '
                    'as first argument (got nothing instead)'.format(
                        self.func.__name__,
                        klass.__name__)
                )
            return wrapper

        def make_bound(self, instance):
            """
            The functions belongs to a specific instance of a class, bind it to that instance.
            """
            if __debug__:
                logger.debug("{0}.make_bound, {1}, {2}".format(self.__class__.__name__, self, instance))

            class Wrapper(object):
                """
                A wrapper class to be able to cache multiple arguments as a key (not possible through the
                '[]' operator.
                """
                def __init__(self, instance, cache):
                    self.instance = instance
                    self.cache = cache

                def __call__(self, *args, **kwargs):
                    # todo: use kwargs also for the key?
                    if __debug__:
                        logger.debug("bound wrapper: {0}.__call__, {1}, {2}, {3}".format(\
                                                                    self.__class__.__name__, self, args, kwargs))
                    # return self.cache[(self.instance, ) + args]
                    return self.cache.__getitem__(self.instance,  *args)

            wrapper = Wrapper(instance, LRUCache(self.func, self.maxsize))
            setattr(instance, self.func.__name__, wrapper)
            return wrapper

    def _create(func):
        """
        Wrapper method to delegate the real calls to an instance of the internal Descript class.
        """
        return Descript(func, maxsize)

    if __debug__:
        logger.debug("lru_cached decorator, {0}".format(maxsize))
    return _create

#  -----------------------------------------------------------------------------



#  -----------------------------------------------------------------------------

if __debug__:
    _DELTA = time.time() - _START_TIME
    logger.debug('%s loaded: %fs' % (__name__, _DELTA))

